#loading packages
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'readr' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 4.0.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(lubridate)
library(highcharter)
## Warning: package 'highcharter' was built under R version 4.3.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Highcharts (www.highcharts.com) is a Highsoft software product which is
## not free for commercial and Governmental use
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(patchwork)
library(readxl)
global_income = read.csv("global_income_inequality.csv")
global_income = global_income |>
rename(average_income_USD = Average.Income..USD.,
top10_income_share = Top.10..Income.Share....,
bottom10_income_share = Bottom.10..Income.Share....,
gini_index = Gini.Index,
income_group = Income.Group,
country = Country,
year = Year,
population = Population) |>
group_by(country, year) |>
ungroup()
Country_data = read_excel("Country_data.xlsx")
Country_data = Country_data |>
rename(country = Country,
region = Region) |>
mutate(country = recode(country,
"United Kingdom of Great Britain and Northern Ireland" = "United Kingdom",
"Russian Federation" = "Russia",
"United States of America" = "United States"))
global_income = global_income |>
inner_join(Country_data, by = "country")
#sum missing
sum_miss = function(col){
sum(is.na(col))
}
lapply(global_income, sum_miss)
## $country
## [1] 0
##
## $year
## [1] 0
##
## $population
## [1] 0
##
## $gini_index
## [1] 0
##
## $average_income_USD
## [1] 0
##
## $top10_income_share
## [1] 0
##
## $bottom10_income_share
## [1] 0
##
## $income_group
## [1] 0
##
## $`Country code`
## [1] 0
##
## $region
## [1] 0
p1 = global_income |>
ggplot(aes(x = gini_index, y = average_income_USD, color = region, frame = year)) +
geom_point(aes(size = population))
ggplotly(p1)
p2 = global_income |>
ggplot(aes(x = gini_index, y = top10_income_share, color = region, frame = year)) +
geom_point(aes(size = population))
ggplotly(p2)
p3 = global_income |>
ggplot(aes(x = gini_index, y = top10_income_share, color = region, frame = year)) +
geom_point(aes(size = population))
ggplotly(p3)
top3_unequal = global_income |>
filter(year %in% 2023) |>
arrange(desc(gini_index)) |>
head(n = 3) |>
pull(country)
bottom3_unequal = global_income |>
filter(year %in% 2023) |>
arrange(gini_index) |>
head(n = 3) |>
pull(country)
global_income |>
filter(country %in% top3_unequal) |>
hchart(type = "line", hcaes(x = year, y = gini_index, group = country)) |>
hc_title(text = "Average income over time by country (Most disparities)")
global_income |>
filter(country %in% bottom3_unequal) |>
hchart(type = "line", hcaes(x = year, y = gini_index, group = country)) |>
hc_title(text = "Average income over time by country (Least disparities)")
## Potential visuals
c1 = global_income |>
filter(year > 2019) |>
group_by(year) |>
ggplot(aes(x = as.factor(year), y = gini_index)) +
geom_boxplot() +
coord_flip()
c2 = global_income |>
filter(year <= 2019, year >= 2015) |>
group_by(year) |>
ggplot(aes(x = as.factor(year), y = gini_index)) +
geom_boxplot() +
coord_flip()
c1 / c2
global_income |>
group_by(year) |>
summarize(mean_gini = mean(gini_index), median_gini = median(gini_index)) |>
pivot_longer(cols = c(mean_gini, median_gini),
names_to = "type",
values_to = "value") |>
hchart(type = "line", hcaes(x = year, y = value, group = type))
global_income |>
group_by(year) |>
summarize(mean_income = mean(average_income_USD), median_income = median(average_income_USD)) |>
pivot_longer(cols = c(mean_income, median_income),
names_to = "type",
values_to = "value") |>
hchart(type = "line", hcaes(x = year, y = value, group = type))
global_income |>
group_by(year, income_group) |>
summarize(mean_income = mean(average_income_USD),
mean_gini = mean(gini_index)) |>
filter(income_group %in% c("High Income", "Low Income")) |>
hchart(type = "line", hcaes(x = year, y = mean_gini, group = income_group))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
global_income |>
filter(country %in% "United States") |>
mutate(g_color = ifelse(gini_index < 0.3, "#34A853","#EA4335")) |>
hchart(type = "column", hcaes(x = year,
y = gini_index,
color = g_color))
global_income |>
group_by(year, region) |>
summarize(mean_income = mean(average_income_USD),
mean_gini = mean(gini_index), .groups = "drop") |>
filter(region %in% c("NA", "LATAM")) |>
hchart(type = "line", hcaes(x = year, y = mean_income, group = region))
global_income |>
group_by(year, region) |>
summarize(mean_income = mean(average_income_USD),
mean_gini = mean(gini_index), .groups = "drop") |>
filter(region %in% c("EMEA", "APAC")) |>
hchart(type = "line", hcaes(x = year, y = mean_income, group = region))